library(tidyverse)
library(ccesMRPprep)
stopifnot(packageVersion("ccesMRPprep") >= "0.1.8.900")
library(haven)
library(dataverse)
library(fs)
library(dplyr)
library(survey)
runs <- c("full", "nocontradict")
for (x in runs) {

    if (x == "full") {
        file1 <- "data/output/ccc_2016-2020_voted_2pty.rds"
        file2 <- "data/output/ccc_2016-2020.rds"
    } else if (x == "nocontradict") {
        file1 <- "data/output/ccc_2016-2020_voted_2pty_noc.rds"
        file2 <- "data/output/ccc_2016-2020_noc.rds"
    }

    dv_download_path <- "data/input/ccc_2016-2020_dataverse-original.rds"
    if (file_exists(dv_download_path)) {
     cc_16_20_full <- read_rds(dv_download_path)
    } else {
      cc_16_20_full <- get_cces_dataverse("cumulative") |>
        filter(year %in% c(2016, 2020))

      if (dir_exists(path_dir(dv_download_path)))
        write_rds(write_rds(dv_download_path))

    }

    # Work to drop contradictors

    if (x == "nocontradict") {
        # policy questions for CCES; policing not present in the cumulative content
        dat20 <- get_dataframe_by_name(
            filename = "CES20_Common_OUTPUT_vv.dta",
            dataset = "10.7910/DVN/E9N6PH",
            original = TRUE,
            .f = haven::read_dta,
            server = "dataverse.harvard.edu"
        )
        dat16 <- read_dta("data/input/CCES16_Common_OUTPUT_Feb2018_VV.dta")
        dat16$year <- 2016
        dat20$year <- 2020
        colnames(dat16)[colnames(dat16) == "V101"] <- "case_id"
        colnames(dat20)[colnames(dat20) == "caseid"] <- "case_id"
        ## make one joint file
        dat16$inconsistent <- 0
        dat20$inconsistent <- 0
        dat20$inconsistent[dat20$CC20_334c==1 & dat20$CC20_334d==1] <- 1

        # For abortion items
        dat16$inconsistenta <- 0
        dat16$inconsistenta[dat16$CC16_332a==1 & dat16$CC16_332f==1] <- 1

        dat20$inconsistenta <- 0
        dat20$inconsistenta[dat20$CC20_332a==1 & dat20$CC20_332f==1] <- 1

        # Create a type variable that combines both inconsistents
        dat16$type <- "1 contradiction"
        dat16$type[dat16$inconsistenta==0] <- "No contradictions"

        dat20$type <- "1 contradiction"
        dat20$type[dat20$inconsistent==0 & dat20$inconsistenta==0] <- "No contradictions"
        dat20$type[dat20$inconsistent==1 & dat20$inconsistenta==1] <- "2 contradictions"
        inconsistents_append <- bind_rows(dat20, dat16)
        inconsistents_append<- dplyr::select(inconsistents_append, c("year", "case_id", "inconsistent", "inconsistenta", "type"))
        cc_16_20_full <- merge(cc_16_20_full, inconsistents_append, by = c("year", "case_id"), all.x = TRUE)
        cc_16_20_full <- cc_16_20_full %>% filter(inconsistent != 1 & inconsistenta != 1)

        # Create list of targets from ACS data
        gender <- c(0.49, 0.51)
        agecat <- c(0.21, 0.17, 0.16, 0.17, 0.15, 0.14)
        race5 <- c(0.63, 0.12, 0.16, 0.06, 0.03)
        educ <- c(0.12, 0.28, 0.22, 0.08, 0.19, 0.11)
        weduc <- c(0.22, 0.20, 0.13, 0.08, 0.17, 0.11, 0.05, 0.03)
        region <- c(0.18, 0.21, 0.38, 0.24)
        prez16 <- c(.339, .336, .053, .272) # 2016 prez vote targets only if needed

        ##age recode
        #recodes for cces
        breaks <- c(18, 30, 40, 50, 60, 70, Inf)

        # Define labels for the categories
        labels <- c(1, 2, 3, 4, 5, 6)

        # Create a new variable "age_cat" by cutting the "age" variable into categories
        cc_16_20_full$agecat <- as.numeric(cut(cc_16_20_full$age, breaks = breaks, labels = labels, include.lowest = TRUE))

        #race

        cc_16_20_full$race5 <-  cc_16_20_full$race
        cc_16_20_full$race5[cc_16_20_full$race > 4] <- 5

        # race x age
        cc_16_20_full$educ4 <- car::recode(cc_16_20_full$educ, "1:2=1; 3:4=2; 5=3; 6=4")

        # Create White X Education interaction (need to create in both datasets)
        cc_16_20_full$weduc  <- NA
        cc_16_20_full$weduc[cc_16_20_full$race5==1 & cc_16_20_full$educ4==1] <- 1
        cc_16_20_full$weduc[cc_16_20_full$race5==1 & cc_16_20_full$educ4==2] <- 2
        cc_16_20_full$weduc[cc_16_20_full$race5==1 & cc_16_20_full$educ4==3] <- 3
        cc_16_20_full$weduc[cc_16_20_full$race5==1 & cc_16_20_full$educ4==4] <- 4
        cc_16_20_full$weduc[cc_16_20_full$race5>1 & cc_16_20_full$educ4==1] <- 5
        cc_16_20_full$weduc[cc_16_20_full$race5>1 & cc_16_20_full$educ4==2] <- 6
        cc_16_20_full$weduc[cc_16_20_full$race5>1 & cc_16_20_full$educ4==3] <- 7
        cc_16_20_full$weduc[cc_16_20_full$race5>1 & cc_16_20_full$educ4==4] <- 8

        # region
        region_xwalk <- read_dta("data/input/state_region_xwalk.dta")
        cc_16_20_full <- merge(cc_16_20_full, region_xwalk, by = "state")

        # pres vote

        cc_16_20_full <- cc_16_20_full %>%
            mutate(prez16 = case_when(
                voted_pres_16 %in% c(1, 2) ~ voted_pres_16,
                voted_pres_16 %in% c(3, 5, 6) ~ 3,
                is.na(voted_pres_16) | voted_pres_16 == 4 ~ 4,
                TRUE ~ NA_real_
            ))

        targets <- list(gender, agecat, race5, educ, weduc, prez16, region)
        names(targets) <- c("gender", "agecat",  "race5", "educ", "weduc", "prez16", "region")

        library(anesrake)
        cc_20_full <- cc_16_20_full %>% filter(year == 2020)
        outsave <-  anesrake(targets, cc_20_full, caseid = cc_20_full$case_id
                            , cap = 8, type = "nolim")
        cc_16_20_full$weight[cc_16_20_full$year == 2020] <- unlist(outsave[1])
    }

    cc_df <-
      cc_16_20_full |>
      ccc_std_demographics(bh_as_hisp = TRUE, wh_as_hisp = TRUE) |>
      mutate(female = as.integer(gender == 2)) |>
      dplyr::select(year, case_id, state, cd,
             zipcode, county_fips,
             matches("weight"),
             age, gender, race, hispanic, educ, female,
             matches("pid"),
             matches("_pres_party"),
             matches("_pres_16"),
             matches("_pres_12"),
             vv_turnout_gvm,
             vv_party_gen) |>
      mutate(across(where(is.labelled), as_factor)) |>
      mutate(
        st = str_sub(cd, 1, 2),
        race_orig = race,
        race = fct_collapse(race, `Other` = c("Native American", "Asian", "All Other")),
        race = fct_relevel(race, "White", "Black", "Hispanic", "Other"),
        race_Black = as.integer(race == "Black"),
        race_Hispanic = as.integer(race == "Hispanic"),
        race_Other = as.integer(race == "Other"),
        pid3_leaner = replace_na(as.character(pid3_leaner), "Missing"),
        pid3_leaner = recode_factor(
          pid3_leaner,
           `Democrat (Including Leaners)` = "D",
           `Republican (Including Leaners)` = "R",
          .default = "Other"),
        vv_party = fct_recode(vv_party_gen, D = "Democratic Party", R = "Republican Party"),
        vv_party = fct_other(vv_party, keep = c("D", "R")),
        vv_party_R = as.integer(vv_party == "R"),
        vv_party_D = as.integer(vv_party == "D")
      )

    # SUBSET to people who voted and only for one pres party
    vv_2pty <- cc_df |>
      filter(vv_turnout_gvm == "Voted") |>
      filter(voted_pres_party %in% c("Democratic", "Republican")) |>
      mutate(trump = voted_pres_party == "Republican")

    # save for output
    write_rds(vv_2pty, file1)
    write_rds(cc_df, file2)
}
